here we perform some overall alpha diversity analyses inspecting species richness and diversity as well as determining the fish gill mucus core microbiota via prevalence and abundance. Spatio-temporal of bacterioplankton and fish is found in chapter 4.5 depicting especially the interesting microbiota adapation in migrating fish. First steps of data exploration for modeling alpha diversity are shown in 5. however seem to transfer low information.
before you start: .rs.restartR()
#-
simple richness plot over all samples
##########################
#Observed Diversity Plots#
##########################
for (i in names(pslist)[grepl("ps", names(pslist))]){
require(plyr); require(ggrepel); require(cowplot); require(phyloseq)
#if (OperatingSystem == "Mac" ) {quartz() }
tryCatch({
g <- paste(i); print(i)
#gg <- sub('ps', '', g)
A<- plot_richness(pslist[[i]], x = "SampleID", measures = c("Observed")) + # Shannon
geom_bar(aes(fill = sample_data(pslist[[i]])$Species), stat = "identity", position = "stack") + #fill = Phylum
scale_x_discrete(limits = SSU_Samples[SSU_Samples %in% sample_names(pslist[[i]])]) +
#scale_x_discrete(breaks=factor(sample_data(pslist[[i]])$Replicates, levels=SLOrder)) +
scale_fill_manual(values = col.Palette$col.Palette.Species) +
theme(axis.text.x.bottom = element_text(size=rel(0.8),angle = 45, vjust = 1, hjust = 1),
legend.title = element_text( size = 6),
legend.text = element_text(size = 6),
legend.key.size = unit(0.4, 'cm'),)+
labs(x = element_blank(), y = "Alpha Diversity Measure (non normalized)")
title <- ggdraw() + draw_label_themeRK(paste(g), element = "plot.title",x = 0.05, hjust = 0, vjust = 1)
subtitle <- ggdraw() + draw_label_themeRK(paste("Paired End 16S",sep = " "), element = "plot.subtitle",
x = 0.05, hjust = 0,
vjust = 1)
prow <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
A<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0.04, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name, Type, paste(g, "Observed-AlphaDiversity", sep="_"), Date, sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8,
height = 8)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
## [1] "ps_SSU"
## [1] "ps_WF"
## [1] "ps_Fish"
## [1] "ps_OE"
## [1] "ps_GC"
## [1] "ps_OEWF"
## [1] "ps_GCWF"
Visualizing of diversity is covered in raw and filtered datasets
for (Dataset in names(pslistraw)) {
require(vegan)
spacc_raw_ps <- pslistraw[[Dataset]]
spacc_ps <- pslist[[Dataset]]
Datasetname <- sub("ps_", "", Dataset)
spacc_raw <- vegan::specaccum(otu_table(spacc_raw_ps), method = "random", permutations = 100)
spacc <- vegan::specaccum(otu_table(spacc_ps), method = "random", permutations = 100)
#creating a dataframe for ggplot2
data_spacc_raw <- data.frame(Specimen=spacc_raw$sites, Richness=spacc_raw$richness, SD=spacc_raw$sd)
A<- ggplot() +
geom_point(data=data_spacc_raw, aes(x=Specimen, y=Richness)) +
geom_line(data=data_spacc_raw, aes(x=Specimen, y=Richness)) +
geom_ribbon(data=data_spacc_raw ,aes(x=Specimen, ymin=(Richness-2*SD),ymax=(Richness+2*SD)),alpha=0.2) +
atheme +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=10),
legend.text=element_text(size=10)) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50")) +
ggtitle(paste(Datasetname, "raw"))
Species_Accumulation_raw <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
data_spacc <- data.frame(Specimen=spacc$sites, Richness=spacc$richness, SD=spacc$sd)
B<- ggplot() +
geom_point(data=data_spacc, aes(x=Specimen, y=Richness)) +
geom_line(data=data_spacc, aes(x=Specimen, y=Richness)) +
geom_ribbon(data=data_spacc,aes(x=Specimen, ymin=(Richness-2*SD),ymax=(Richness+2*SD)),alpha=0.2) +
atheme +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=10),
legend.text=element_text(size=10)) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50")) +
ggtitle(paste(Datasetname, "abundance filtered"))
Species_Accumulation_Filtered <- cowplot::plot_grid(B, labels = c(""), ncol = 1)
require(vegan)
cowplot::plot_grid(Species_Accumulation_raw, Species_Accumulation_Filtered, labels = c("A", "B"), ncol = 1) -> part_1
ggsave(part_1, filename = paste(paste(save_name, Type, "SpeciesAccumulation", Datasetname, Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 6,height = 9)
plot(part_1)
}
simple plotting of the taxa with highest abundance over time and space clue here are the preserved and automatically picked colors between taxa
##########################
#Barplot with color code#
##########################
#Run once to determine color for both fish species
TaxLevel <- "Genus"
for (Dataset in names(pslist)[grepl("ps_Fish", names(pslist))]){
require(plyr); require(dplyr); require(ggrepel); require(cowplot); require(phyloseq)
Datasetname <- sub("ps_", "", Dataset)
FILENAME <- paste(paste(save_name, "Alpha_BarPlot_Publication", TaxLevel, Datasetname, sep="_"), ".png", sep="")
WIDTH = 10 + length(sample_names(pslist[[Dataset]])) *0.12
if (Datasetname %in% c("GC", "GCWF", "Fish")) {
pslist[[Dataset]] <- prune_samples(sample_names(pslist[[Dataset]])[!grepl("GCSU21BBEB6", sample_names(pslist[[Dataset]]))], pslist[[Dataset]])
} else {pslist[[Dataset]] <- pslist[[Dataset]]}
################################
#Create Relative Abundance Data#
################################
ps_alpha_barplot <- pslist[[Dataset]] %>%
tax_glom(taxrank = TaxLevel) %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
filter(Abundance > 1) %>% # Filter out low abundance taxa
arrange(Genus) %>% # Sort data frame alphabetically by phylum
dplyr::arrange(desc(Abundance))
Order <- SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset ]])]
ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)
ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
############################
#Create TotalAbundance Data#
############################
phylum_abundance <- ps_alpha_barplot %>%
dplyr::group_by(.data[[TaxLevel]]) %>%
dplyr::summarise(TotalAbundance = sum(Abundance))
ordered_levels <- phylum_abundance %>%
dplyr::arrange(desc(TotalAbundance)) %>%
pull(.data[[TaxLevel]])
ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels = SSU_Samples)
##########################
#Subset for Interest ASVs#
##########################
df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
######################
#Define Phylum Colors#
######################
phylum_colors <- c(
"Actinobacteriota" = "red",
"Cyanobacteria" = "green",
"Bacteroidota" = "darkgreen",
"Proteobacteria1" = "darkorange2",
"Proteobacteria2" = "#663300",
#"Patescibacteria" = "pink",
"Acidobacteriota " = "#FF00CC",
"Planctomycetota" = "purple",
"Verrucomicrobiota" = "blue",
"Chloroflexota" = "cyan",
"Deinococcota" = "#FFFF00",
"Gemmatimonadota" = "#8F7C00",
"Other" = "grey20")
generate_shades <- function(base_color, num_variations) {
color_ramp <- colorRampPalette(c(base_color, "white"))
# Generate a vector of colors
colors <- color_ramp(num_variations+1)
# Create a data frame with the colors
color_palette <- colors[1:(length(colors) - 1)]
return(color_palette)}
##################################################
#Create Color Dataframe for Taxa from PhylaColors#
##################################################
taxonomy_df <- df[c("Phylum", "Taxa")]
#taxonomy_df$ASV <- sub(".*:", "", taxonomy_df$OTU)
taxonomy_df <- taxonomy_df[!duplicated(taxonomy_df$Taxa),]
#############################################
#Order to ordered_levels from Total Abudance#
order_index <- match(ordered_levels, taxonomy_df$Taxa)
taxonomy_df <- taxonomy_df[order_index, ]
print(head(taxonomy_df[c("Phylum", "Taxa")]))
taxonomy_df$Phylum2 <- ifelse(taxonomy_df$Phylum == "Proteobacteria",
paste0("Proteobacteria", ave(seq_along(taxonomy_df$Phylum),
taxonomy_df$Phylum, FUN = function(x) ifelse(seq_along(x) %% 2 == 0, 2, 1))),
taxonomy_df$Phylum)
unique_phyla <- unique(taxonomy_df$Phylum2)
# Assign base colors to unique phyla
#phylum_colors <- setNames(base_colors[1:length(unique_phyla)], unique_phyla)
# Print the mapping of phyla to colors
phylum_cols<- as.data.frame(phylum_colors)
phylum_cols$Phyla <- rownames(phylum_cols)
colored_taxonomy <- data.frame(
Taxa = taxonomy_df$Taxa,
Phylum2 = taxonomy_df$Phylum2,
Color = character(length(taxonomy_df$Taxa)), # Initialize an empty character vector
stringsAsFactors = FALSE)
# Generate shades and assign colors to each taxon
for (ii in seq_along(unique_phyla)) {
phylum <- unique_phyla[ii]
if (phylum %in% names(phylum_colors)) {
base_color <- phylum_colors[[phylum]]
} else {
base_color <- phylum_colors[["Other"]]
}
taxon_indices <- taxonomy_df$Phylum2 == phylum
num_variations <- sum(taxon_indices)
shades <- generate_shades(base_color, pmin(num_variations, 5))
#colored_taxonomy$Color[taxon_indices] <- shades
colored_taxonomy$Color[which(taxon_indices)[1:pmin(num_variations, 5)]]<- shades
}
colored_taxonomy$Color <- gsub("^$", NA, trimws(colored_taxonomy$Color))
colored_taxonomy$Color <- replace_na(colored_taxonomy$Color, "grey50")
##################
# #Some adjustments#
updateColor <- function(taxa, color) {
if (any(colored_taxonomy$Taxa == taxa)) {
colored_taxonomy[colored_taxonomy$Taxa == taxa,]$Color <- color
}
return(colored_taxonomy)
}
colored_taxonomy <- updateColor("Psychrobacter", "#FFCC00")
colored_taxonomy <- updateColor("Polynucleobacter", "#FF6600")
#updateColor("Photobacterium", "darkorange3")
# updateColor("Vibrio", "#FF9966")
#updateColor("Citrobacter", "#FFCC00")
#updateColor("Psychrobacter", "#FFCC00")
# updateColor("Enterobacter", "#993300")
#updateColor("Acinetobacter", "#FF9933")
Alpha_Diversity_df <- left_join(df, colored_taxonomy)
color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% ordered_levels[1:34]] <-
"Other"
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x") +
atheme2 +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") +
scale_fill_manual(values = color_mapping) +
#ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
#inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
#awhite +
theme(strip.text.y = element_text(angle = 0)) +
#theme(axis.text.x=element_blank(),
#axis.text.y=element_blank(),
#axis.title.y.left = element_blank(),
#axis.ticks.y =element_blank()
#) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
strip.text.y.left = element_text(size = 9,face = "bold"),
axis.title.y.left = element_text(size=12,face = "bold"))
p
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in% RepOrder]
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = WIDTH, height = 6)
}
## Phylum Taxa
## 10 Bacteroidota Elizabethkingia
## 204 Proteobacteria Enterobacter
## 196 Proteobacteria Citrobacter
## 12 Proteobacteria Psychrobacter
## 218 Bacteroidota Chryseobacterium
## 53 Verrucomicrobiota Luteolibacter
colored_taxonomy_Fish <- colored_taxonomy
pslist[["Optics"]] <- list()
pslist[["Optics"]][["colored_taxonomy_Fish"]] <- colored_taxonomy_Fish
##############################
#Now use color from both fish#
##############################
TaxLevel <- "Genus"
for (Dataset in names(pslist)[grepl("ps_OE|ps_GC|ps_WF", names(pslist))]){
require(plyr); require(dplyr); require(ggrepel); require(cowplot); require(phyloseq)
Datasetname <- sub("ps_", "", Dataset)
FILENAME <- paste(paste(save_name, "Alpha_BarPlot_Publication", TaxLevel, Datasetname, sep="_"), ".png", sep="")
if (Datasetname %in% c("GC", "OE")) {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
} else if (Datasetname %in% c("WF")) {
WIDTH <- 5 + length(sample_names(pslist[[Dataset]])) *0.12
} else {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
}
ps <- pslist[[Dataset]]
if (Datasetname %in% c("GC", "GCWF", "Fish")) {
ps <- prune_samples(sample_names(ps)[!grepl("GCSU21BBEB6", sample_names(ps))], ps)
} else {ps <- ps}
if (Datasetname %in% c("WF", "GCWF", "OEWF")) {
ps <- prune_samples(sample_names(ps)[!grepl("WFSU22MLEB", x =
sample_names(ps ))], ps)
} else {ps<-ps} #Missing physioligical data
################################
#Create Relative Abundance Data#
################################
ps_alpha_barplot <- ps %>%
tax_glom(taxrank = TaxLevel) %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
filter(Abundance > 1) %>% # Filter out low abundance taxa
arrange(Genus) %>% # Sort data frame alphabetically by phylum
dplyr::arrange(desc(Abundance))
ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)
ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
############################
#Create TotalAbundance Data#
############################
phylum_abundance <- ps_alpha_barplot %>%
dplyr::group_by(.data[[TaxLevel]]) %>%
dplyr::summarise(TotalAbundance = sum(Abundance))
ordered_levels <- phylum_abundance %>%
dplyr::arrange(desc(TotalAbundance)) %>%
pull(.data[[TaxLevel]])
ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels =
SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset]])])
##########################
#Subset for Interest ASVs#
##########################
df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
##################################################
#Create Color Dataframe for Taxa from PhylaColors#
##################################################
taxonomy_df <- df[c("Phylum", "Taxa")]
#taxonomy_df$ASV <- sub(".*:", "", taxonomy_df$OTU)
taxonomy_df <- taxonomy_df[!duplicated(taxonomy_df$Taxa),]
#############################################
#Order to ordered_levels from Total Abudance#
order_index <- match(ordered_levels, taxonomy_df$Taxa)
taxonomy_df <- taxonomy_df[order_index, ]
print(head(taxonomy_df[c("Phylum", "Taxa")]))
taxonomy_df$Phylum2 <- ifelse(taxonomy_df$Phylum == "Proteobacteria",
paste0("Proteobacteria", ave(seq_along(taxonomy_df$Phylum),
taxonomy_df$Phylum, FUN = function(x) ifelse(seq_along(x) %% 2 == 0, 2, 1))),
taxonomy_df$Phylum)
unique_phyla <- unique(taxonomy_df$Phylum2)
# Assign base colors to unique phyla
#phylum_colors <- setNames(base_colors[1:length(unique_phyla)], unique_phyla)
# Print the mapping of phyla to colors
phylum_cols<- as.data.frame(phylum_colors)
phylum_cols$Phyla <- rownames(phylum_cols)
colored_taxonomy <- data.frame(
Taxa = taxonomy_df$Taxa,
Phylum2 = taxonomy_df$Phylum2,
Color = character(length(taxonomy_df$Taxa)), # Initialize an empty character vector
stringsAsFactors = FALSE)
# Generate shades and assign colors to each taxon
for (ii in seq_along(unique_phyla)) {
phylum <- unique_phyla[ii]
if (phylum %in% names(phylum_colors)) {
base_color <- phylum_colors[[phylum]]
} else {
base_color <- phylum_colors[["Other"]]
}
taxon_indices <- taxonomy_df$Phylum2 == phylum
num_variations <- sum(taxon_indices)
shades <- generate_shades(base_color, pmin(num_variations, 5))
#colored_taxonomy$Color[taxon_indices] <- shades
colored_taxonomy$Color[which(taxon_indices)[1:pmin(num_variations, 5)]]<- shades
}
colored_taxonomy$Color <- gsub("^$", NA, trimws(colored_taxonomy$Color))
colored_taxonomy$Color <- replace_na(colored_taxonomy$Color, "grey50")
##################
# #Some adjustments#
updateColor <- function(taxa, color) {
if (any(colored_taxonomy$Taxa == taxa)) {
colored_taxonomy[colored_taxonomy$Taxa == taxa,]$Color <- color
}
return(colored_taxonomy)
}
colored_taxonomy <- updateColor("Psychrobacter", "#FFCC00")
colored_taxonomy <- updateColor("Polynucleobacter", "#FF6600")
#updateColor("Photobacterium", "darkorange3")
# updateColor("Vibrio", "#FF9966")
#updateColor("Citrobacter", "#FFCC00")
#updateColor("Psychrobacter", "#FFCC00")
# updateColor("Enterobacter", "#993300")
#updateColor("Acinetobacter", "#FF9933")
Alpha_Diversity_df <- left_join(df, colored_taxonomy)
#################################
#Update to colored_taxonomy_Fish#
#################################
for (i in 1:nrow(Alpha_Diversity_df)) {
matching_row <- colored_taxonomy_Fish[colored_taxonomy_Fish$Taxa %in% Alpha_Diversity_df$Taxa[i], ]
if (nrow(matching_row) > 0) {
Alpha_Diversity_df$Color[i] <- matching_row$Color
}
}
color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% ordered_levels[1:34]] <-
"Other"
if (Datasetname == "WF") {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Season, levels = SeasonOrder), drop = TRUE, scale = "free",
space = "free_x")
COL <- col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in%
unique(sample_data(ps)$Season)]
} else {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x")
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in%
unique(sample_data(ps)$Reps)]
}
p <- p +
atheme2 +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") +
scale_fill_manual(values = color_mapping) +
#ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
#inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
#awhite +
theme(strip.text.y = element_text(angle = 0)) +
#theme(axis.text.x=element_blank(),
#axis.text.y=element_blank(),
#axis.title.y.left = element_blank(),
#axis.ticks.y =element_blank()
#) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
strip.text.y.left = element_text(size = 9,face = "bold"),
axis.title.y.left = element_text(size=12,face = "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = WIDTH, height = 6)
plot(Alpha_Diversity_BarPlot )
}
## Phylum Taxa
## 2 Actinobacteriota hgcI clade
## 3 Actinobacteriota CL500-29 marine group
## 5 Proteobacteria Limnohabitans
## 4 Verrucomicrobiota Persicirhabdus
## 55 Proteobacteria Polynucleobacter
## 28 Nitrospirota Nitrospira
## Phylum Taxa
## 3 Bacteroidota Elizabethkingia
## 143 Proteobacteria Enterobacter
## 165 Proteobacteria Citrobacter
## 4 Proteobacteria Psychrobacter
## 35 Verrucomicrobiota Luteolibacter
## 133 Bacteroidota Chryseobacterium
## Phylum Taxa
## 8 Bacteroidota Elizabethkingia
## 78 Proteobacteria Enterobacter
## 77 Proteobacteria Citrobacter
## 4 Proteobacteria Polynucleobacter
## 95 Bacteroidota Chryseobacterium
## 99 Proteobacteria Psychrobacter
## Phylum Taxa
## 3 Bacteroidota Elizabethkingia
## 148 Proteobacteria Enterobacter
## 175 Proteobacteria Citrobacter
## 4 Proteobacteria Psychrobacter
## 35 Verrucomicrobiota Luteolibacter
## 135 Bacteroidota Chryseobacterium
## Phylum Taxa
## 8 Bacteroidota Elizabethkingia
## 79 Proteobacteria Enterobacter
## 76 Proteobacteria Citrobacter
## 4 Proteobacteria Polynucleobacter
## 99 Bacteroidota Chryseobacterium
## 102 Proteobacteria Psychrobacter
Some thoughts from: McMurtrie, J., Alathari, S., Chaput, D. L., Bass, D., Ghambi, C., Nagoli, J., … & Tyler, C. R. (2021). Relationships between pond water and tilapia skin microbiomes in aquaculture ponds in Malawi. bioRxiv.McMurtrie, J., Alathari, S., Chaput, D. L., Bass, D., Ghambi, C., Nagoli, J., … & Tyler, C. R. (2021). Relationships between pond water and tilapia skin microbiomes in aquaculture ponds in Malawi. bioRxiv. Core microbiome analysis seeks to find taxa conserved between samples above a defined prevalence threshold. In this case we are interested in finding the core MB of the Fish Swab community which is present in fish across different Locations sites. Core calculations will be performed with the microbiome package. We will then visualise these core taxa as a heatmap using pheatmap. This will also reveal abundances across both Loc and fish samples, indicating if the fish core taxa are in high abundance in the Loc water and therefore potentially non-residents. Final visualisations for the finished figure requires some manual editing in Inkscape.
R Setup Libraries
Import phyloseq objects Weight up in choosing what taxa level to study. The full ASV set is large and requires a low core threshold as there is a reasonable degree of variation in ASVs e.g. likely different strains. However, in the core analysis we are most interested in the major types of organisms appearing to be common in either system. For instance we could have three individual ASVs of the same genus, each a key photosynthetic but only one is dominant per pond site, collectively though they are occupying the same niche. This is obviously a generalisation and not applicable to all scenarios. Most core studies I have seen group at genus level. I did explore the use of tip_gloming instead to use a phylogenetic informed grouping level, rather than arbitrary taxonomy, but this compromises resolution of taxonomic classifications that I am not willing to accept.
for (Dataset in c(names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE|ps_Fish", names(pslist)[!grepl("WF", names(pslist))])], "ps_WF")) {
ps <- pslist[[Dataset]]
Datasetname <- sub("ps_", "", Dataset)
pslist[[paste("Core", Datasetname, sep="_")]] <- list()
require(phyloseq)
core_taxa_standard <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
microbiome::core_members(., detection = 0.0001, prevalence = 90/100)
#ps_rel <- microbiome::transform(pslist[[Dataset]], "compositional")
#core_taxa_standard <- microbiome::core_members(ps_rel, detection = 0.0001, prevalence = 90/100)
#ps_core <- microbiome::core(ps_rel, detection = 0.0001, prevalence = .9)
ps_core <- prune_taxa(core_taxa_standard, ps)
core_taxa <- microbiome::taxa(ps_core)
tax_mat <- tax_table(ps_core)
tax_df <- as.data.frame(tax_mat)
tax_df$ASV <- rownames(tax_df)
# select taxonomy of only those OTUs that are core memebers based on the thresholds that were used.
core_taxa_class <- dplyr::filter(tax_df, rownames(tax_df) %in% core_taxa)
knitr::kable(head(core_taxa_class))
ps_rel_gen <- microbiome::aggregate_taxa(ps_core, "Genus")
# Check if any taxa with no genus classification. aggregate_taxa will merge all unclassified to Unknown
any(taxa_names(ps_rel_gen) == "Unknown")
# Remove Unknown
ps_rel_gen <- subset_taxa(ps_rel_gen, Genus!="Unknown")
library(RColorBrewer)
prevalences <- seq(.05, 1, .05)
detections <- round(10^seq(log10(1e-5), log10(.2), length = 10), 3)
# p1 <- microbiome::plot_core(ps_rel_gen,
# plot.type = "heatmap",
# colours = rev(brewer.pal(5, "RdBu")),
# prevalences = prevalences,
# detections = detections, min.prevalence = .8) +
# xlab("Detection Threshold (Relative Abundance (%))")
# p1 <- p1 + theme_bw() + ylab("ASVs")
# p1
pslist[[paste("Core", Datasetname, sep="_")]] <- core_taxa_class
}
TaxLevel <- "ASV"
flipped <- T
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]) {
require(plyr); require(dplyr); require(ggrepel); require(cowplot); require(phyloseq)
Datasetname <- sub("ps_", "", Dataset)
ps <- pslist[[Dataset]]
ps2 <- ps #pslist[[paste(Dataset, "WF", sep="")]]
core_taxa_standard <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
microbiome::core_members(., detection = 0.0001, prevalence = 90/100)
GroupOfInterest <- core_taxa_standard
FILENAME <- paste(paste(save_name, "Core_BarPlot_noWF", Datasetname, sep="_"), ".png", sep="")
if (Datasetname %in% c("GC", "OE")) {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
} else if (Datasetname %in% c("WF")) {
WIDTH <- 5 + length(sample_names(pslist[[Dataset]])) *0.12
} else {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
}
################################
#Create Relative Abundance Data#
################################
ps_alpha_barplot <- ps2 %>%
#tax_glom(taxrank = TaxLevel) %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
#filter(Abundance > 1) %>% # Filter out low abundance taxa
arrange(Genus) %>% # Sort data frame alphabetically by phylum
dplyr::arrange(desc(Abundance))
ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)
#ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
############################
#Create TotalAbundance Data#
############################
# phylum_abundance <- ps_alpha_barplot %>%
# dplyr::group_by(.data[[TaxLevel]]) %>%
# dplyr::summarise(TotalAbundance = sum(Abundance))
# ordered_levels <- phylum_abundance %>%
# dplyr::arrange(desc(TotalAbundance)) %>%
# pull(.data[[TaxLevel]])
#
# ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels =
SSU_Samples[SSU_Samples %in% sample_names(ps2)])
SSU_Samples[SSU_Samples %in% sample_names(ps2)]
##########################
#Subset for Interest ASVs#
##########################
df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
#df <- df[df$OTU %in% GroupOfInterest,]
df$Abundance[!(df$OTU %in% GroupOfInterest)] <- 0
phylum_abundance <- df %>%
dplyr::group_by(.data[[TaxLevel]]) %>%
dplyr::summarise(TotalAbundance = sum(Abundance))
ordered_levels <- phylum_abundance %>%
dplyr::arrange(desc(TotalAbundance)) %>%
pull(.data[[TaxLevel]])
df$Taxa <- factor(df[[TaxLevel]], levels = ordered_levels)
Alpha_Diversity_df <- df
Alpha_Diversity_df$Colors <- NA
Alpha_Diversity_df$Reps <- factor(Alpha_Diversity_df$Reps, levels = RepOrder[RepOrder %in% Alpha_Diversity_df$Reps])
#################################
#Update to colored_taxonomy_Fish#
#################################
taxa_levels <- c("Genus", "Family", "Order", "Class", "Phylum")
taxa_level2 <- c("Taxa", "Phylum2")
Alpha_Diversity_df$Color <- "grey"
# Loop through each row of Alpha_Diversity_df
for (i in 1:nrow(Alpha_Diversity_df)) {
matching_color <- NA # Initialize matching color to NA
# Loop through each taxonomic level
for (level in taxa_levels) {
matching_row <- colored_taxonomy_Fish[colored_taxonomy_Fish$Taxa %in%
Alpha_Diversity_df[[level]][i], ]
# If there's a match, update matching_color and exit the loop
if (nrow(matching_row) > 0) {
matching_color <- matching_row$Color
break
}
}
# If there's no match at any level, try matching without numbers
if (is.na(matching_color)) {
matching_row <- colored_taxonomy_Fish[gsub("\\d", "", colored_taxonomy_Fish$Phylum2) %in%
Alpha_Diversity_df[["Phylum"]][i], ]
if (nrow(matching_row) > 0) {
matching_color <- matching_row$Color
}
}
# Assign the matching color to the corresponding row in Alpha_Diversity_df
Alpha_Diversity_df$Color[i] <- matching_color
}
color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
color_mapping[["Candidatus Megaira"]] <- "purple"
color_mapping[["Acinetobacter.lwoffii"]] <- "#996600"
color_mapping[["Acinetobacter.johnsonii"]] <- "#663300"
color_mapping[["Shewanella"]] <- "#FF3366"
color_mapping[["Shewanella.baltica"]] <- "#FF0099"
color_mapping[["Shewanella.putrefaciens"]] <-"#FF0066"
color_mapping[["Photobacterium"]] <- "#FFF000"
color_mapping[["Aeromonas"]] <- "#FFCC00"
levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% head(unique(sub(".*:", "", GroupOfInterest)), 34) ] <-
"Other"
if (flipped == FALSE) {
if (Datasetname == "WF") {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Season, levels = SeasonOrder), drop = TRUE, scale = "free",
space = "free_x")
COL <- col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in%
unique(sample_data(ps2)$Season)]
} else {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x")
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in% unique(sample_data(ps2)$Reps)]
}
p <- p +
atheme +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") +
scale_fill_manual(values = color_mapping) +
ylim(0, 60) +
#ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
#inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
#awhite +
theme(strip.text.y = element_text(angle = 0)) +
#theme(axis.text.x=element_blank(),
#axis.text.y=element_blank(),
#axis.title.y.left = element_blank(),
#axis.ticks.y =element_blank()
#) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
strip.text.y.left = element_text(size = 9,face = "bold"),
axis.title.y.left = element_text(size=12,face = "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300,
width = WIDTH, height = 6)
} else if (flipped == TRUE) {
# New facet label names for supp variable
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in% unique(sample_data(ps2)$Reps)]
Short.labs <- gsub("[^0-9]", "", Alpha_Diversity_df$Reps)
names(Short.labs) <- Alpha_Diversity_df$Reps
p <- ggplot(Alpha_Diversity_df, aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
coord_flip() +
#facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_x", switch = "y")
facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_y", switch = "y",
labeller = labeller(Reps = Short.labs))
p <- p +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") + #atheme+
scale_fill_manual(values = color_mapping) +
ylim(0, 80) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
) +
theme(axis.title.x.bottom = element_text(color="grey13", size=15, face ="bold"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 15, face = "bold", angle = 45, hjust = 1),
#strip.text.y.left = element_text(size = 9,face = "bold"),
#axis.title.y.left = element_text(size=12,face = "bold")
) +
theme(legend.position = "none")
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-l', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Core_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Core_BarPlot <- plot_grid(Core_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Core_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 2.7, height = 10)
plot(Core_BarPlot)
}
}
#-
Analyzing here the taxa that are shared between bacterioplankton and the fish gill mucus, in the first step we determine the “most shared” taxa by geometric mean of overlapping taxa between datasets In the second step we run mantel tests for the biological samples replicates and the bacterioplankton to see which groups show the strongest overall matrix correlation Finally we plot the shared taxa between bacterioplankton and fish mucus but excluding the fish gill core taxa which we saw before highly abundant in the fish but rare in the bacterioplankton
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]) {
Datasetname <- sub("ps_","", Dataset)
ps <- pslist[[paste("ps", Datasetname, sep="_")]]
SampleSums <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
as.data.frame() %>%
rownames_to_column(var = "SampleID") %>%
dplyr::left_join(SAMDF16S, by = c("SampleID" = "SampleID")) %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(dplyr::across(rownames(tax_table(ps)), mean, na.rm = TRUE)) %>%
t() %>%
as.data.frame() %>%
`colnames<-`(.[1, ]) %>%
.[-1, ] %>%
stats::setNames(paste0("avg_", colnames(.))) %>%
mutate_all(as.numeric) %>%
round(., digits = 2) %>%
rownames_to_column(var="ASV")
SeasonSums <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
as.data.frame() %>%
rownames_to_column(var = "SampleID") %>%
dplyr::left_join(SAMDF16S, by = c("SampleID" = "SampleID")) %>%
dplyr::group_by(Season) %>%
dplyr::summarise(dplyr::across(rownames(tax_table(ps)), mean, na.rm = TRUE)) %>%
t() %>%
as.data.frame() %>%
`colnames<-`(.[1, ]) %>%
.[-1, ] %>%
stats::setNames(paste0("avg_", colnames(.))) %>%
mutate_all(as.numeric) %>%
round(., digits = 2) %>%
rownames_to_column(var="ASV")
RepsSums <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
as.data.frame() %>%
rownames_to_column(var = "SampleID") %>%
dplyr::left_join(SAMDF16S, by = c("SampleID" = "SampleID")) %>%
dplyr::group_by(Replicates) %>%
dplyr::summarise(dplyr::across(rownames(tax_table(ps)), mean, na.rm = TRUE)) %>%
t() %>%
as.data.frame() %>%
`colnames<-`(.[1, ]) %>%
.[-1, ] %>%
stats::setNames(paste0("avg_", colnames(.))) %>%
mutate_all(as.numeric) %>%
round(., digits = 2) %>%
rownames_to_column(var="ASV")
##########################
#Fish vs Bacterioplankton#
##########################
WF_ASVmeans <- pslist$ps_WF %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
t() %>%
as.data.frame() %>%
dplyr::mutate(ASVmeans_WF = rowMeans(.)) %>%
dplyr::mutate(ASV = rownames(.)) %>%
dplyr::select(ASV, ASVmeans_WF)
Fish_ASVmeans <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
t() %>%
as.data.frame() %>%
dplyr::mutate(ASVmeans_Fish = rowMeans(.)) %>%
dplyr::mutate(ASV = rownames(.)) %>%
dplyr::select(ASV, ASVmeans_Fish)
#Merge values on TAXON level
Fish_ASVmeans$Taxon <- sub("ASV\\d+:", "", Fish_ASVmeans$ASV)
Fish_Taxonmeans <- Fish_ASVmeans %>%
dplyr::group_by(Taxon) %>%
dplyr::summarize(Taxon_means_Fish = sum(ASVmeans_Fish)) %>%
dplyr::arrange(desc(Taxon_means_Fish))
WF_ASVmeans$Taxon <- sub("ASV\\d+:", "", WF_ASVmeans$ASV)
WF_Taxonmeans <- WF_ASVmeans %>%
dplyr::group_by(Taxon) %>%
dplyr::summarize(Taxon_means_WF = sum(ASVmeans_WF)) %>%
dplyr::arrange(desc(Taxon_means_WF))
#Combine Datasets
Overlapping_Fish <- Fish_Taxonmeans[Fish_Taxonmeans$Taxon %in% WF_Taxonmeans$Taxon,]
Overlapping_WF <- WF_Taxonmeans[WF_Taxonmeans$Taxon %in% Fish_Taxonmeans$Taxon,]
Overlapping <- Overlapping_Fish %>% left_join(Overlapping_WF)
###########################################################
#Take geometric mean of overlapping taxa in fish and Water#
print(paste("Most overlapping taxa between", Datasetname, "and Bakterioplankton"))
print(Overlapping %>%
dplyr::mutate(CombinedAbundance = sqrt(Taxon_means_Fish * Taxon_means_WF)) %>%
dplyr::arrange(desc(CombinedAbundance)))
#################################
#Overlapping ASVs betweem biomes#
#################################
#Combine Datasets
Overlapping_Fish_ASV <- Fish_ASVmeans[Fish_ASVmeans$ASV %in% WF_ASVmeans$ASV ,]
Overlapping_WF_ASV <- WF_ASVmeans[WF_ASVmeans$ASV %in% Fish_ASVmeans$ASV ,]
Overlapping_ASV <- Overlapping_Fish_ASV %>% left_join(Overlapping_WF_ASV)
# print(paste("Relative Abundance of Overlapping taxa between biomes in", Datasetname))
# print(sum(Overlapping$ASVmeans))
print(paste("Relative Abundance of Overlapping taxa between biomes in Bacterioplankton"))
print(sum(WF_ASVmeans[WF_ASVmeans$ASV %in% Overlapping_ASV$ASV,]$ASVmeans))
print(paste("Relative Abundance of Overlapping taxa between biomes in Bacterioplankton and", Datasetname))
RepsSums_BiomeSums <- RepsSums[RepsSums$ASV %in% Overlapping_ASV$ASV,] %>%
dplyr::summarize(across(starts_with("avg_"), sum, na.rm = TRUE))%>%
gather(., key = "Replicates", value = "SummedAbundance") %>%
dplyr::arrange(desc(SummedAbundance))
print(RepsSums_BiomeSums)
print(mean(RepsSums_BiomeSums$SummedAbundance))
RepsSums_BiomeSums_NoCore <- RepsSums[RepsSums$ASV %in% setdiff(Overlapping_ASV$ASV, pslist[[paste("Core", Datasetname, sep="_")]]$ASV),] %>%
dplyr::summarize(across(starts_with("avg_"), sum, na.rm = TRUE))%>%
gather(., key = "Replicates", value = "SummedAbundance") %>%
dplyr::arrange(desc(SummedAbundance))
print(RepsSums_BiomeSums_NoCore)
print(mean(RepsSums_BiomeSums_NoCore$SummedAbundance))
}
## [1] "Most overlapping taxa between OE and Bakterioplankton"
## # A tibble: 247 × 4
## Taxon Taxon_means_Fish Taxon_means_WF CombinedAbundance
## <chr> <dbl> <dbl> <dbl>
## 1 Luteolibacter 4.09 1.58 2.54
## 2 Persicirhabdus 1.00 2.27 1.51
## 3 Flavobacterium 1.10 1.39 1.24
## 4 CL500-29 marine group 0.288 4.88 1.19
## 5 Elizabethkingia 16.7 0.0642 1.04
## 6 TRA3-20 0.549 1.44 0.889
## 7 Ilumatobacter 0.753 1.00 0.868
## 8 Halioglobus 0.468 1.46 0.828
## 9 Rhizobiales Incertae Sedis 0.400 0.986 0.628
## 10 Vicinamibacteraceae 0.262 1.45 0.617
## # ℹ 237 more rows
## [1] "Relative Abundance of Overlapping taxa between biomes in Bacterioplankton"
## [1] 48.71117
## [1] "Relative Abundance of Overlapping taxa between biomes in Bacterioplankton and OE"
## Replicates SummedAbundance
## 1 avg_OESP22MG 68.64
## 2 avg_OESU21MG 60.00
## 3 avg_OESP22BB 59.34
## 4 avg_OESP22ML 56.59
## 5 avg_OESU21BB 54.88
## 6 avg_OESP22SS 50.35
## 7 avg_OEAU21ML 49.69
## 8 avg_OESU21SS 49.61
## 9 avg_OESU21ML 49.01
## 10 avg_OESU21TW 47.51
## 11 avg_OEWI22SS 44.64
## 12 avg_OESP22TW 43.75
## 13 avg_OEWI22ML 43.43
## 14 avg_OEWI22MG 41.23
## 15 avg_OEWI22TW 40.13
## 16 avg_OEAU21TW 39.49
## 17 avg_OEWI22BB 36.43
## 18 avg_OEAU21SS 31.63
## 19 avg_OEAU21BB 28.21
## 20 avg_OEAU21MG 20.46
## [1] 45.751
## Replicates SummedAbundance
## 1 avg_OEAU21ML 47.47
## 2 avg_OESP22MG 40.21
## 3 avg_OEAU21TW 36.97
## 4 avg_OESU21ML 36.58
## 5 avg_OEAU21SS 29.07
## 6 avg_OESU21TW 27.65
## 7 avg_OESP22ML 26.26
## 8 avg_OEAU21BB 26.25
## 9 avg_OESU21MG 25.65
## 10 avg_OESU21SS 22.27
## 11 avg_OEWI22ML 20.85
## 12 avg_OESP22BB 20.57
## 13 avg_OESU21BB 19.60
## 14 avg_OEWI22MG 17.80
## 15 avg_OESP22SS 17.21
## 16 avg_OEWI22SS 16.63
## 17 avg_OEWI22TW 16.42
## 18 avg_OESP22TW 16.13
## 19 avg_OEAU21MG 15.28
## 20 avg_OEWI22BB 15.06
## [1] 24.6965
## [1] "Most overlapping taxa between GC and Bakterioplankton"
## # A tibble: 220 × 4
## Taxon Taxon_means_Fish Taxon_means_WF CombinedAbundance
## <chr> <dbl> <dbl> <dbl>
## 1 Polynucleobacter 4.40 0.825 1.91
## 2 Luteolibacter 1.09 1.58 1.31
## 3 Elizabethkingia 17.8 0.0642 1.07
## 4 Rhodobacteraceae 2.18 0.509 1.05
## 5 Persicirhabdus 0.438 2.27 0.997
## 6 Flavobacterium 0.582 1.39 0.901
## 7 TRA3-20 0.537 1.44 0.880
## 8 CL500-29 marine group 0.134 4.88 0.810
## 9 Ilumatobacter 0.531 1.00 0.729
## 10 Halioglobus 0.325 1.46 0.689
## # ℹ 210 more rows
## [1] "Relative Abundance of Overlapping taxa between biomes in Bacterioplankton"
## [1] 41.39703
## [1] "Relative Abundance of Overlapping taxa between biomes in Bacterioplankton and GC"
## Replicates SummedAbundance
## 1 avg_GCAU21TW 66.12
## 2 avg_GCAU21SS 58.72
## 3 avg_GCSP22BB 55.74
## 4 avg_GCAU21ML 54.60
## 5 avg_GCSU21BB 53.81
## 6 avg_GCSP22ML 50.78
## 7 avg_GCSP22TW 50.76
## 8 avg_GCWI22SS 50.74
## 9 avg_GCWI22BB 49.80
## 10 avg_GCSU21ML 46.99
## 11 avg_GCSP22SS 45.33
## 12 avg_GCAU21BB 42.48
## 13 avg_GCSU21TW 36.79
## 14 avg_GCWI22ML 35.65
## 15 avg_GCSU21SS 35.43
## [1] 48.916
## Replicates SummedAbundance
## 1 avg_GCSU21ML 32.26
## 2 avg_GCAU21TW 31.37
## 3 avg_GCWI22BB 24.46
## 4 avg_GCSU21TW 20.04
## 5 avg_GCWI22SS 17.25
## 6 avg_GCAU21ML 15.10
## 7 avg_GCSU21BB 13.81
## 8 avg_GCAU21SS 12.78
## 9 avg_GCSP22TW 10.66
## 10 avg_GCAU21BB 10.60
## 11 avg_GCSP22ML 10.31
## 12 avg_GCSP22BB 10.06
## 13 avg_GCSU21SS 9.63
## 14 avg_GCWI22ML 8.45
## 15 avg_GCSP22SS 8.24
## [1] 15.668
####################################################
#Mantel test Bacterioplankton vs Fish per Replicate#
####################################################
Bacterioplankton_Mantel_list <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]) {
Datasetname <- sub("ps_","", Dataset)
Bacterioplankton_Mantel_list[[Datasetname]] <- list()
ps <- pslist[[paste("ps", Datasetname, sep="_")]]
RepsSums <- ps %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>%
phyloseq::otu_table() %>%
as.data.frame() %>%
rownames_to_column(var = "SampleID") %>%
dplyr::left_join(SAMDF16S, by = c("SampleID" = "SampleID")) %>%
dplyr::group_by(Replicates) %>%
dplyr::summarise(dplyr::across(rownames(tax_table(ps)), mean, na.rm = TRUE)) %>%
t() %>%
as.data.frame() %>%
`colnames<-`(.[1, ]) %>%
.[-1, ] %>%
stats::setNames(paste0("avg_", colnames(.))) %>%
mutate_all(as.numeric) %>%
round(., digits = 2) %>%
rownames_to_column(var="ASV")
RepsSums$Taxon <- sub("ASV\\d+:", "", RepsSums$ASV)
RepsSums_Taxonmeans <- RepsSums %>%
dplyr::group_by(Taxon) %>%
dplyr::summarize(across(starts_with("avg_"), sum, na.rm = TRUE))
RepsSums_Taxonmeans <- RepsSums_Taxonmeans %>%
dplyr::rowwise() %>%
dplyr::mutate(RowSum = sum(c_across(starts_with("avg_")), na.rm = TRUE)) %>%
dplyr::arrange(desc(RowSum))
merged_df <- inner_join(RepsSums_Taxonmeans, WF_Taxonmeans, by = "Taxon")
merged_df2 <- as.data.frame(merged_df)
rownames(merged_df2) <- merged_df2$Taxon
merged_df2 <- merged_df2[,grepl("avg_", colnames(merged_df2))]
colnames(merged_df2) <- sub("avg_", "", colnames(merged_df2))
TAXA <- as.data.frame(tax_table(ps))
TAXA$Taxa <- sub("ASV\\d+:", "", rownames(TAXA ))
TAXA <- TAXA[!duplicated(TAXA$Taxa),][c("Phylum", "Taxa")]
long_df<- merged_df2 %>%
pivot_longer(cols = everything(),
names_to = "Replicates",
values_to = "Abundance")
long_df <- cbind(Taxa = rownames(merged_df2), long_df)
long_df<- long_df %>%
left_join(SAMDF16S[!duplicated(SAMDF16S$Replicates),]) %>%
left_join(TAXA)
#long_df$Abundance[!(long_df$Taxa %in% GroupOfInterest)] <- 0
phylum_abundance <- long_df %>%
dplyr::group_by(.data[["Taxa"]]) %>%
dplyr::summarise(TotalAbundance = sum(Abundance))
ordered_levels <- phylum_abundance %>%
dplyr::arrange(desc(TotalAbundance)) %>%
pull(.data[["Taxa"]])
long_df $Taxa <- factor(long_df[["Taxa"]], levels = ordered_levels)
calculate_correlations <- function(df, columns, comparison_column) {
correlations <- sapply(columns, function(col) {
stats::cor(df[[col]], df[[comparison_column]], use = "complete.obs",
method = c("pearson"))
})
return(correlations)
}
# Get the columns you want to compare
columns_to_compare <- names(RepsSums_Taxonmeans)[grep("^avg_", names(RepsSums_Taxonmeans))]
# Calculate the correlations
correlations <- calculate_correlations(merged_df, columns_to_compare, "Taxon_means_WF")
correlations %>%
as.data.frame() %>%
dplyr::arrange(desc(correlations))
#############################
#Mantel Test on every column#
#############################
mantel_results <- lapply(columns_to_compare, function(COLUM) {
# Create distance matrices for the column and Taxon_means_WF
dist_matrix_col <- as.matrix(dist(merged_df[[COLUM]], method = "euclidean"))
dist_matrix_wf <- as.matrix(dist(merged_df$Taxon_means_WF, method = "euclidean"))
# Perform the Mantel test
mantel_test <- mantel(dist_matrix_col, dist_matrix_wf, permutations = 999)
return(c(mantel_test$statistic, mantel_test$signif))
})
mantel_results_df <- as.data.frame(do.call(rbind, mantel_results))
colnames(mantel_results_df) <- c("Mantel_Statistic", "P_Value")
mantel_results_df <- cbind(Column = columns_to_compare, mantel_results_df)
# Order the results by Mantel statistic
mantel_results_df <- mantel_results_df %>%
dplyr::arrange(desc(Mantel_Statistic))
mantel_results_df$Replicates <- sub("avg_", "", mantel_results_df$Column)
mantel_results_df <- mantel_results_df %>%
dplyr::left_join(SAMDF16S[SAMDF16S$Species %in% Datasetname & !duplicated(SAMDF16S$Replicates),])
mantel_results_df$Reps <- factor(mantel_results_df$Reps, levels = RepOrder)
Bacterioplankton_Mantel_list[[Datasetname]] <- mantel_results_df
}
Bacterioplankton_Mantel <- rbind(Bacterioplankton_Mantel_list$OE, Bacterioplankton_Mantel_list$GC)
#####################
#Plot Mantel Results#
#####################
Bacterioplankton_Mantel$Season <- factor(Bacterioplankton_Mantel$Season, levels=SeasonOrder)
RepOrder2 <-
c("SU 633", "SU 651", "SU 665", "SU 692", "SU 713",
"AU 633", "AU 651", "AU 665", "AU 692", "AU 713",
"WI 633", "WI 651", "WI 665", "WI 692", "WI 713",
"SP 633", "SP 651", "SP 665", "SP 692", "SP 713")
Bacterioplankton_Mantel$Reps <- factor(Bacterioplankton_Mantel$Reps, levels=RepOrder2)
Bacterioplankton_Mantel$Species <- factor(Bacterioplankton_Mantel$Species, levels =c("OE", "GC"))
Mantel_plot <- Bacterioplankton_Mantel %>%
ggplot(., aes(x=Species, y=Reps, fill=Mantel_Statistic )) +
geom_tile() +
scale_fill_gradient2(
low = "#FFFFFF",
high = "#006000",
limit = c(-0.2,1)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, size = 15, face ="bold"),
#axis.title.x.bottom = element_text(size=15,face = "bold"),
#axis.title.y.left = element_text(size=15,face = "bold"),
strip.text.x = element_text(size = 15, face = "bold"),
strip.text.x.bottom = element_text(size = 15,face = "bold"),
strip.text.y.left = element_text(size = 15,face = "bold"),
strip.text.y.right = element_text(size = 15,face = "bold"),
axis.title.x.bottom = element_blank(),
axis.title.y.left = element_blank(),
axis.text.x.bottom = element_text(face = "bold", angle = 45, hjust = 1),
axis.text.y.left = element_text(face = "bold", size = 15),
axis.text.y.right = element_text(face = "bold", size = 15),
legend.title = element_text(size = 15, face ="bold"),
legend.text = element_text(size = 15,face = "bold"),
plot.title = element_text(size = 15, face = "bold"),
plot.subtitle = element_text(size = 15),
plot.caption = element_text(size = 15)) +
labs(#title = "Correlation relationship between avg.dist Bray-Curtis matrices and varibles", y = "Sample kind",
fill="corr") +
labs(fill = "Mantel's r") +
#theme(legend.box.background = element_rect(color = "black"))
theme(legend.position="right", legend.key.width = unit(1, "cm"))
Mantel_plot <- Mantel_plot + geom_text(vjust=0.76,hjust=0.49, aes(label = paste0(c("","*")[(abs(P_Value) <= .05)+1],
c("","*")[(abs(P_Value) <= .001)+1], c("","*")[(abs(P_Value) <= .001)+1])))
# Mantel_plot <- Mantel_plot + facet_grid(. ~ factor(Season, levels = SeasonOrder), drop = TRUE, scale = #"free", space = "free_x")
Mantel_plot <- Mantel_plot + facet_grid(vars(Season), scale="free")
Mantel_plot <- cowplot::plot_grid(Mantel_plot, labels = c(""), ncol = 1)
Mantel_plot <- plot_grid(Mantel_plot, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(Mantel_plot)
ggsave(Mantel_plot, filename = paste(paste("Mantel_test_Bacterioplankton_FishReps",
sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 4,height = 7)
colored_taxonomy_Fish <- pslist$Optics$colored_taxonomy_Fish
#################
#GroupofInterest#
#################
GroupOfInterest <- setdiff(Overlapping_ASV$ASV, pslist$Core_Fish$ASV)
Comparison_Name <- "Biomes_Overlapping_Taxa_noCore"
TaxLevel <- "ASV"
flipped <- T
for (Dataset in names(pslist)[grepl("ps_GCWF|ps_OEWF", names(pslist))]) {
require(plyr); require(dplyr); require(ggrepel); require(cowplot); require(phyloseq)
Datasetname <- sub("ps_", "", Dataset)
ps <- pslist[[Dataset]]
GroupOfInterest <- GroupOfInterest
FILENAME <- paste(paste(save_name, "Alpha_BarPlot", Comparison_Name, Datasetname, sep="_"), ".png", sep="")
if (Datasetname %in% c("GC", "OE")) {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
} else if (Datasetname %in% c("WF")) {
WIDTH <- 5 + length(sample_names(pslist[[Dataset]])) *0.12
HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
} else {
WIDTH <- 10 + length(sample_names(pslist[[Dataset]])) *0.12
HEIGHT <- 10 #length(sample_names(pslist[[Dataset]])) *0.08
}
################################
#Create Relative Abundance Data#
################################
ps_alpha_barplot <- ps %>%
#tax_glom(taxrank = TaxLevel) %>%
transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
#filter(Abundance > 1) %>% # Filter out low abundance taxa
arrange(Genus) %>% # Sort data frame alphabetically by phylum
dplyr::arrange(desc(Abundance))
ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)
#ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
############################
#Create TotalAbundance Data#
############################
# phylum_abundance <- ps_alpha_barplot %>%
# dplyr::group_by(.data[[TaxLevel]]) %>%
# dplyr::summarise(TotalAbundance = sum(Abundance))
# ordered_levels <- phylum_abundance %>%
# dplyr::arrange(desc(TotalAbundance)) %>%
# pull(.data[[TaxLevel]])
#
# ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels =
SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset]])])
SSU_Samples[SSU_Samples %in% sample_names(pslist[[Dataset]])]
##########################
#Subset for Interest ASVs#
##########################
df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
#df <- df[df$OTU %in% GroupOfInterest,]
df$Abundance[!(df$OTU %in% GroupOfInterest)] <- 0
phylum_abundance <- df %>%
dplyr::group_by(.data[[TaxLevel]]) %>%
dplyr::summarise(TotalAbundance = sum(Abundance))
ordered_levels <- phylum_abundance %>%
dplyr::arrange(desc(TotalAbundance)) %>%
pull(.data[[TaxLevel]])
df$Taxa <- factor(df[[TaxLevel]], levels = ordered_levels)
Alpha_Diversity_df <- df
Alpha_Diversity_df$Colors <- NA
Alpha_Diversity_df$Reps <- factor(Alpha_Diversity_df$Reps, levels = RepOrder[RepOrder %in%
Alpha_Diversity_df$Reps])
#################################
#Update to colored_taxonomy_Fish#
#################################
taxa_levels <- c("Genus", "Family", "Order", "Class", "Phylum")
taxa_level2 <- c("Taxa", "Phylum2")
Alpha_Diversity_df$Color <- "grey"
# Loop through each row of Alpha_Diversity_df
for (i in 1:nrow(Alpha_Diversity_df)) {
matching_color <- NA # Initialize matching color to NA
# Loop through each taxonomic level
for (level in taxa_levels) {
matching_row <- colored_taxonomy_Fish[colored_taxonomy_Fish$Taxa %in%
Alpha_Diversity_df[[level]][i], ]
# If there's a match, update matching_color and exit the loop
if (nrow(matching_row) > 0) {
matching_color <- matching_row$Color
break
}
}
# If there's no match at any level, try matching without numbers
if (is.na(matching_color)) {
matching_row <- colored_taxonomy_Fish[gsub("\\d", "", colored_taxonomy_Fish$Phylum2) %in%
Alpha_Diversity_df[["Phylum"]][i], ]
if (nrow(matching_row) > 0) {
matching_color <- matching_row$Color
}
}
# Assign the matching color to the corresponding row in Alpha_Diversity_df
Alpha_Diversity_df$Color[i] <- matching_color
}
color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
color_mapping[["Candidatus Megaira"]] <- "purple"
color_mapping[["Acinetobacter.lwoffii"]] <- "#996600"
color_mapping[["Acinetobacter.johnsonii"]] <- "#663300"
color_mapping[["Shewanella"]] <- "#FF3366"
color_mapping[["Shewanella.baltica"]] <- "#FF0099"
color_mapping[["Shewanella.putrefaciens"]] <-"#FF0066"
color_mapping[["Photobacterium"]] <- "#FFF000"
color_mapping[["Aeromonas"]] <- "#FFCC00"
levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% head(unique(sub(".*:", "", GroupOfInterest)), 34) ] <-
"Other"
if (flipped == FALSE) {
if (Datasetname == "WF") {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Season, levels = SeasonOrder), drop = TRUE, scale = "free",
space = "free_x")
COL <- col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in%
unique(sample_data(ps)$Season)]
} else {
p <- ggplot(Alpha_Diversity_df,
aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_y")
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in% unique(sample_data(ps)$Reps)]
}
p <- p +
atheme +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") +
scale_fill_manual(values = color_mapping) +
ylim(0, 100) +
#ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
#inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
#awhite +
theme(strip.text.y = element_text(angle = 0)) +
#theme(axis.text.x=element_blank(),
#axis.text.y=element_blank(),
#axis.title.y.left = element_blank(),
#axis.ticks.y =element_blank()
#) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#legend.background = element_rect(fill='transparent'), #transparent legend bg
#legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
strip.text.y.left = element_text(size = 9,face = "bold"),
axis.title.y.left = element_text(size=12,face = "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300,
width = WIDTH, height = 6)
} else if (flipped == TRUE) {
# New facet label names for supp variable
COL <- col.Palette$col.Palette.Reps[names(col.Palette$col.Palette.Reps) %in% unique(sample_data(ps)$Reps)]
Short.labs <- gsub("[^0-9]", "", Alpha_Diversity_df$Reps)
names(Short.labs) <- Alpha_Diversity_df$Reps
p <- ggplot(Alpha_Diversity_df, aes(x = SampleID, y = Abundance, fill = factor(Taxa))) +
geom_bar(stat = "identity") +
coord_flip() +
#facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_x", switch = "y")
facet_grid(rows = vars(Reps), drop = TRUE, scale = "free", space = "free_x", switch = "y",
labeller = labeller(Reps = Short.labs))
p <- p +
ylab("Relative Abundance [%] \n") + xlab("") +
labs(fill="") + #atheme+
scale_fill_manual(values = color_mapping) +
ylim(0, 100) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
) +
theme(axis.title.x.bottom = element_text(color="grey13", size=15, face ="bold"),
strip.text = element_text(color = "black", face= "bold")) +
theme(
legend.title=element_text(size=9),
legend.text=element_text(size=9),
axis.text.x.bottom = element_text(size= 15, face = "bold", angle = 45, hjust = 1),
#strip.text.y.left = element_text(size = 9,face = "bold"),
#axis.title.y.left = element_text(size=12,face = "bold")
) +
theme(legend.position = "none")
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-l', g$layout$name))
fills <- alpha(COL, 0.8)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 2.7, height = HEIGHT)
plot(Alpha_Diversity_BarPlot)
}
}
first steps of alpha diversity analysis the dataset shows overall little patterns and high multicollinearity between influencing variables, therefore we decided to skip modeling alpha diversity
###################
#Phylum Statistics#
###################
Phylum_Statistics_list <- list()
for (Dataset in c(names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])], "ps_WF")) {
individual <- pslist[[Dataset]] %>% tax_glom(taxrank = "Phylum") %>%
transform_sample_counts(function(x) {x/sum(x)}) %>%
psmelt() %>%
#dplyr::filter(Abundance > 0.0005) %>%
dplyr::arrange("Phylum")
individual_abund <- as.data.frame(individual %>%
dplyr::select(Sample, SampleID, Replicates2, Phylum, Abundance) %>%
dplyr::group_by(Phylum, Sample) %>%
dplyr::summarise(TotalAbundance = (sum(Abundance))*100)) %>%
dplyr::ungroup()%>%
dplyr::left_join(individual %>%
dplyr::select(Sample, SampleID, Replicates2, Phylum, Abundance),
by = c("Sample", "Phylum"))
individual_abund_SD <- individual_abund %>%
dplyr::group_by(Phylum) %>%
dplyr::summarise(SD = round(sd(Abundance)*100, digits = 1))
merged_species <- merge_samples(pslist[[Dataset]], "Species")
# Use psmelt to obtain a long-format data.frame
merged_species <- merged_species %>% tax_glom(taxrank = "Phylum") %>%
transform_sample_counts(function(x) {x/sum(x)}) %>%
psmelt() %>%
#dplyr::filter(Abundance > 0.0005) %>%
dplyr::arrange("Phylum")
merged_species_abund <- as.data.frame(merged_species %>%
dplyr::select(Sample, SampleID, Phylum, Abundance) %>%
dplyr::group_by(Phylum, Sample) %>%
dplyr::summarise(TotalAbundance = round(sum(Abundance)*100,digits = 1)))
print(Dataset)
data <- merged_species_abund %>%
left_join(individual_abund_SD) %>%
dplyr::arrange(desc(TotalAbundance))
print(data)
Phylum_Statistics_list[[Dataset]] <- data
}
## [1] "ps_OE"
## Phylum Sample TotalAbundance SD
## 1 Proteobacteria OE 51.0 10.3
## 2 Bacteroidota OE 27.7 11.9
## 3 Actinobacteriota OE 6.8 5.3
## 4 Verrucomicrobiota OE 5.3 7.6
## 5 Firmicutes OE 4.0 7.1
## 6 Deinococcota OE 2.4 2.3
## 7 Chloroflexi OE 0.7 1.2
## 8 Acidobacteriota OE 0.4 0.5
## 9 Patescibacteria OE 0.4 0.4
## 10 Desulfobacterota OE 0.3 0.6
## 11 Cyanobacteria OE 0.2 0.4
## 12 Dependentiae OE 0.2 0.9
## 13 Nitrospirota OE 0.2 0.4
## 14 Bdellovibrionota OE 0.1 0.2
## 15 Myxococcota OE 0.1 0.2
## 16 Planctomycetota OE 0.1 0.2
## 17 Armatimonadota OE 0.0 0.1
## 18 Campylobacterota OE 0.0 0.3
## 19 Fusobacteriota OE 0.0 0.3
## 20 Gemmatimonadota OE 0.0 0.0
## 21 Spirochaetota OE 0.0 0.1
## [1] "ps_GC"
## Phylum Sample TotalAbundance SD
## 1 Proteobacteria GC 57.1 13.2
## 2 Bacteroidota GC 25.0 9.1
## 3 Actinobacteriota GC 5.9 4.8
## 4 Firmicutes GC 5.0 13.3
## 5 Verrucomicrobiota GC 2.6 2.3
## 6 Deinococcota GC 2.2 2.2
## 7 Acidobacteriota GC 0.5 0.7
## 8 Chloroflexi GC 0.5 0.8
## 9 Cyanobacteria GC 0.2 0.3
## 10 Desulfobacterota GC 0.2 0.3
## 11 Nitrospirota GC 0.2 0.3
## 12 Patescibacteria GC 0.2 0.3
## 13 Planctomycetota GC 0.2 0.2
## 14 Armatimonadota GC 0.1 0.1
## 15 Bdellovibrionota GC 0.0 0.1
## 16 Campylobacterota GC 0.0 0.1
## 17 Dependentiae GC 0.0 0.1
## 18 Fusobacteriota GC 0.0 0.0
## 19 Gemmatimonadota GC 0.0 0.0
## 20 Myxococcota GC 0.0 0.1
## 21 Spirochaetota GC 0.0 0.1
## [1] "ps_WF"
## Phylum Sample TotalAbundance SD
## 1 Proteobacteria WF 41.1 5.4
## 2 Bacteroidota WF 19.2 7.4
## 3 Actinobacteriota WF 19.1 8.8
## 4 Verrucomicrobiota WF 8.4 3.3
## 5 Acidobacteriota WF 2.3 1.8
## 6 Nitrospirota WF 1.9 1.4
## 7 Desulfobacterota WF 1.5 1.1
## 8 Cyanobacteria WF 1.4 1.9
## 9 Gemmatimonadota WF 1.4 0.6
## 10 Chloroflexi WF 1.2 0.9
## 11 Myxococcota WF 0.7 0.4
## 12 Planctomycetota WF 0.4 0.6
## 13 Bdellovibrionota WF 0.3 0.2
## 14 Firmicutes WF 0.3 0.4
## 15 Campylobacterota WF 0.2 0.4
## 16 Armatimonadota WF 0.1 0.1
## 17 Deinococcota WF 0.1 0.1
## 18 Dependentiae WF 0.1 0.0
## 19 Patescibacteria WF 0.1 0.0
## 20 Spirochaetota WF 0.1 0.1
## 21 Caldisericota WF 0.0 0.0
## 22 Calditrichota WF 0.0 0.0
## 23 Cloacimonadota WF 0.0 0.0
## 24 Deferrisomatota WF 0.0 0.0
## 25 Hydrogenedentes WF 0.0 0.0
## 26 Latescibacterota WF 0.0 0.0
## 27 Methylomirabilota WF 0.0 0.0
write.csv2(do.call(rbind, Phylum_Statistics_list), file.path(path_Output_16S,paste(paste(save_name,"Phylum_Statistics_list",Date, sep="_"), ".csv", sep="")))
Ratios of Proteobacteria and Bacteroidetes
Rosado, D., Pérez-Losada, M., Severino, R., Cable, J., & Xavier, R. (2019). Characterization of the skin and gill microbiomes of the farmed seabass (Dicentrarchus labrax) and seabream (Sparus aurata). Aquaculture, 500, 57-64.
https://f1000research.com/articles/5-1492
alphadiv_list <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE|ps_Fish|ps_SSU", names(pslist)[!grepl("WF", names(pslist))])]) {
require(phyloseq)
require(vegan)
require(plyr)
require(dplyr)
require(ggrepel)
require(cowplot)
Datasetname <- sub("ps_", "", Dataset)
a <- length(alphadiv_list)
Samples <- sample_names(pslist[[Dataset]])
ps <- merge_phyloseq(pslist$ps_OE, pslist$ps_GC, pslist$ps_WF)
ps <- prune_samples(sample_names(ps) %in% Samples, ps)
rowSums(otu_table(ps))
min(rowSums(otu_table(ps)))
max(rowSums(otu_table(ps)))
mean(rowSums(otu_table(ps)))
#Richness vs SequencingDepth#
ggplot(data = data.frame("total_reads" = phyloseq::sample_sums(ps),
"observed" = phyloseq::estimate_richness(ps, measures = "Observed")[, 1]),
aes(x = total_reads, y = observed)) +
geom_point() +
geom_smooth(method="lm", se = FALSE) +
labs(x = "\nTotal Reads", y = "Observed Richness\n")
if (Datasetname %in% c("GC", "OE", "GCWF", "OEWF", "Fish")) {
print(paste("Samples removed for low frequency below 5000 seqs in;", Dataset, sep = ""))
print(which(rowSums(otu_table(ps)) < 5000))
ps_Filter <- subset_samples(ps, sample_sums(ps) > 5000)
} else if (Datasetname %in% c("SSU")) {
print(paste("Samples removed for low frequency below 5000 seqs in;", Dataset, sep = ""))
print(which(rowSums(otu_table(ps)) < 5000))
ps_Filter <- subset_samples(ps, sample_sums(ps) > 5000)
} else if (Datasetname %in% c("WF")) {
print(paste("No Waterfiler Samples removed ;", Dataset, sep = ""))
print(rowSums(otu_table(ps)))
ps_Filter <- ps
}
ps_Filter_rare <- phyloseq::rarefy_even_depth(ps_Filter , rngseed = 123, replace = FALSE)
alphadiv_list[[a+1]] <- ps_Filter_rare
names(alphadiv_list)[[a+1]] <- paste("ps_rare", Datasetname, sep="_")
vegan::rarecurve(as.data.frame(otu_table(ps_Filter)), step=100, lwd=2, ylab="ASVs", label=F)
abline(v=(min(rowSums(as.data.frame(otu_table(ps_Filter))))))
vegan::rarecurve(as.data.frame(otu_table(ps_Filter_rare)), step=100, lwd=2, ylab="ASVs", label=F)
adiv <- data.frame(
"Observed" = phyloseq::estimate_richness(ps_Filter_rare, measures = "Observed"),
"Chao1" = phyloseq::estimate_richness(ps_Filter_rare, measures = "Chao1"),
"Shannon" = phyloseq::estimate_richness(ps_Filter_rare, measures = "Shannon"),
"InvSimpson" = phyloseq::estimate_richness(ps_Filter_rare, measures = "InvSimpson"),
"LOC" = phyloseq::sample_data(ps_Filter_rare)$LOC,
"Season" = phyloseq::sample_data(ps_Filter_rare)$Season,
"SampleID" = phyloseq::sample_data(ps_Filter_rare)$SampleID
)
alphadiv_list[[a+2]] <- adiv
names(alphadiv_list)[[a+2]] <- paste("adiv", Datasetname, sep="_")
}
## [1] "Samples removed for low frequency below 5000 seqs in;ps_SSU"
## GCAU21TWFL2 GCAU21MLEB1 WFSU21SSFL WFWI22MGEB
## 183 184 229 232
## [1] "Samples removed for low frequency below 5000 seqs in;ps_Fish"
## GCAU21TWFL2 GCAU21MLEB1
## 183 184
## [1] "Samples removed for low frequency below 5000 seqs in;ps_OE"
## named integer(0)
## [1] "Samples removed for low frequency below 5000 seqs in;ps_GC"
## GCAU21TWFL2 GCAU21MLEB1
## 45 46
pslist$AlphaDiversity <- alphadiv_list
alphadiv_list <- pslist$AlphaDiversity
DataExploration_list <- list()
for (Dataset in names(pslist)[!grepl("WF", names(pslist))][grepl("ps_GC|ps_OE", names(pslist)[!grepl("WF", names(pslist))])]) {
require(phyloseq)
require(vegan)
require(plyr)
require(dplyr)
require(ggrepel)
require(cowplot)
require(ggordiplots)
Datasetname <- sub("ps_", "", Dataset)
ps <- pslist[[Dataset]]
DataExploration_list[[paste(Datasetname)]] <- list()
GroupingVariables <- c("Season", "LOC")
Traits <- c(
"Salinity","O2_catch","SecchiDepth", "Temperature","pH_catch","Age",
"FCF", "HSI", "SSI", "Length", "Weigth", "GSI", "FillLevel",
"NH4", "NO2", "NO3", "O2", "PO4", "pH", "SPM",
GroupingVariables)
print(paste("Samples removed for low frequency below 5000 seqs in;", Datasetname, sep = ""))
print(which(rowSums(otu_table(ps)) < 5000))
ps_Filter <- subset_samples(ps, sample_sums(ps) > 5000)
'%!in%' <- function(x,y)!('%in%'(x,y))
Include <- Traits[Traits %!in% GroupingVariables]
df_Traits <- data.frame(sample_data(ps_Filter))
df_Traits <- df_Traits[names(df_Traits) %in% Traits]
df_Include <- df_Traits[names(df_Traits) %in% Include]
#Just to inspect how well FGG-Data fit to your measured values while fishing:
#df_Include[c("SPM", "SecchiDepth")]
#df_Include[c("pH_catch", "pH")]
#df_Include[c("O2_catch", "O2")]
print("NAs in TraitData")
print(table(is.na(df_Include)))
print((df_Include[!complete.cases(df_Include), ]))
df_Include <- df_Include[complete.cases(df_Include), ]
df_Traits <- df_Traits[complete.cases(df_Traits), ]
dim(df_Traits)
#df_Include <- vegan::decostand(df_Include, method='standardize')
dim(df_Include)
ps_Filter <- prune_samples(sample_names(ps_Filter)[sample_names(ps_Filter) %in%
rownames(df_Include[complete.cases(df_Include), ])], ps_Filter)
adiv <- pslist$AlphaDiversity[[paste("adiv", Datasetname, sep="_")]]
adiv <- adiv[rownames(adiv) %in% rownames(df_Include),]
model_df <- left_join(df_Include %>% mutate(SampleID = rownames(.)), adiv %>% mutate(SampleID = rownames(.)))
model_df$fSeason <- factor(model_df$Season, level = SeasonOrder)
model_df$fLOC <- factor(model_df$LOC, level = LOCOrder)
model_df$Species <- substr(model_df$SampleID, 1, nchar(model_df$SampleID) - 9)
model_df$Replicates <- substr(model_df$SampleID, 1, nchar(model_df$SampleID) - 3)
model_df$fReplicates <- factor(model_df$Replicates, levels = VariableOrder[VariableOrder %in% model_df$Replicates])
rownames(model_df) <- model_df$SampleID
print(Datasetname)
head(model_df)
DataExploration_list[[paste(Datasetname)]][[paste("ps_Filter")]] <- ps_Filter
DataExploration_list[[paste(Datasetname)]][[paste("model_df")]] <- model_df
}
## [1] "Samples removed for low frequency below 5000 seqs in;OE"
## named integer(0)
## [1] "NAs in TraitData"
##
## FALSE
## 2622
## [1] Temperature pH_catch Salinity O2_catch SecchiDepth Length
## [7] FillLevel Age HSI SSI GSI FCF
## [13] NH4 NO2 NO3 O2 PO4 pH
## [19] SPM
## <0 rows> (or 0-length row.names)
## [1] "OE"
## [1] "Samples removed for low frequency below 5000 seqs in;GC"
## GCAU21TWFL2 GCAU21MLEB1
## 45 46
## [1] "NAs in TraitData"
##
## FALSE
## 1634
## [1] Temperature pH_catch Salinity O2_catch SecchiDepth Length
## [7] FillLevel Age HSI SSI GSI FCF
## [13] NH4 NO2 NO3 O2 PO4 pH
## [19] SPM
## <0 rows> (or 0-length row.names)
## [1] "GC"
pslist$DataExploration_list <- DataExploration_list
###################
#Variable Violin#
###################
alphadiv_violin <- list()
for (Dataset in names(alphadiv_list)[grepl("adiv", names(alphadiv_list))]) {
require(plyr)
require(ggrepel)
require(cowplot)
require(EnvStats)
Variables <- c("Observed","Chao1.Chao1","Shannon", "InvSimpson")
Datasetname <- sub("adiv_", "", Dataset)
adiv <- alphadiv_list[[Dataset]]
for (ii in Variables) {
alphadiv_violin_length <- length(alphadiv_violin)
FILENAME <- paste(paste(save_name, Type, Datasetname, ii, "Violin", sep="_"),".png", sep="")
ggg <- paste(ii)
adiv$LOC <- factor(adiv$LOC, levels=LOCOrder)
p <- ggplot(adiv, aes(x=LOC,y=adiv[[ii]], colour=LOC)) +
#geom_boxplot(aes(fill=LOC)) + atheme +
geom_violin(aes(fill=LOC)) +
geom_jitter(aes(color = LOC)) +
scale_color_manual(values= ggplot2::alpha(col.Palette$col.Palette.LOC, 1)) +
scale_fill_manual(values= ggplot2::alpha(col.Palette$col.Palette.LOC, 0.4)) +
#geom_text_repel(aes(label = SampleID), data = adiv, size=2.5) +
facet_grid(. ~ factor(Season, levels=SeasonOrder), drop=TRUE,scale="free",space="free_x") +
labs(x = "", y = noquote(paste(ggg, " [", noquote(AlphaIndices[ii]), "]", sep = ""))) + #"")
#ggtitle(paste(gg, ggg, "over all sampling sites and seasons (Summer, Winter, Spring,)"))
theme(legend.position = "none") +
guides(fill=guide_legend(title="Species")) +
#stat_n_text(size = 4, color = "grey20")+ #white
theme(
panel.background = element_rect(fill='transparent'),
plot.background = element_rect(fill='transparent', color=NA),
#panel.grid.major = element_blank(),
#panel.grid.minor = element_blank(),
legend.background = element_rect(fill='transparent'),
legend.box.background = element_rect(fill='transparent') ) +
theme(
axis.title.y.left = element_text(size=10,face = "bold"),
strip.text.y.left = element_text(size = 10,face = "bold"),
axis.text.y.left = element_text(face = "bold"),
legend.title = element_text( size = 6),
legend.text = element_text(size = 6,face = "bold"),
plot.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 9),
plot.caption = element_text(size = 9),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
strip.text.x = element_text(face = "bold"))
COL <- col.Palette$col.Palette.Season[names(col.Palette$col.Palette.Season) %in% adiv$Season]
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(COL, 0.4)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
title <- ggdraw() + draw_label_themeRK(paste(Datasetname, ggg, "per Location"), element = "plot.title",x = 0.05, hjust = 0, vjust = 0.5) #draw_label_themeRKwhite
alphadiv_violin_plot<- cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.05, 0.98)) #title,
plot(alphadiv_violin_plot)
ggsave(alphadiv_violin_plot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 5,
height = 2.8)
alphadiv_violin[[alphadiv_violin_length+1]] <- alphadiv_violin_plot
names(alphadiv_violin)[[alphadiv_violin_length+1]] <- paste(Datasetname, ii, sep="_")
}
}
##############
#Summary Plot#
##############
cowplot::plot_grid(alphadiv_violin$OE_Observed, alphadiv_violin$GC_Observed,
labels = c("A", "B"), ncol = 1) -> part_1
cowplot::plot_grid(alphadiv_violin$OE_Shannon, alphadiv_violin$GC_Shannon,
labels = c("C", "D"), ncol = 1) -> part_2
cowplot::plot_grid(part_1, part_2, labels = c("", ""), rel_widths = c(1,1), ncol = 2) -> part_3
ggsave(part_3, filename = paste(paste(Species, Year, Season, "AlphaDiv_Violin", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 13,height = 6)
part_3
#-
###########
#Save Data#
###########
saveRDS(pslist, file.path(path_Output_16S, paste(paste(save_name, "ps_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))